{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Update Model Package Approval Status\n",
    "\n",
    "The pipeline that was executed created a Model Package version within the specified Model Package Group. Of particular note, the registration of the model/creation of the Model Package was done so with approval status as `PendingManualApproval`.\n",
    "\n",
    "As part of SageMaker Pipelines, data scientists can register the model with approved/pending manual approval as part of the CI/CD workflow.\n",
    "\n",
    "We can also approve the model using the SageMaker Studio UI or programmatically as shown below."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from botocore.exceptions import ClientError\n",
    "\n",
    "import os\n",
    "import sagemaker\n",
    "import logging\n",
    "import boto3\n",
    "import sagemaker\n",
    "import pandas as pd\n",
    "\n",
    "sess = sagemaker.Session()\n",
    "bucket = sess.default_bucket()\n",
    "role = sagemaker.get_execution_role()\n",
    "region = boto3.Session().region_name\n",
    "\n",
    "sm = boto3.Session().client(service_name=\"sagemaker\", region_name=region)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%store -r pipeline_name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(pipeline_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "\n",
    "import time\n",
    "from pprint import pprint\n",
    "\n",
    "executions_response = sm.list_pipeline_executions(PipelineName=pipeline_name)[\"PipelineExecutionSummaries\"]\n",
    "pipeline_execution_status = executions_response[0][\"PipelineExecutionStatus\"]\n",
    "print(pipeline_execution_status)\n",
    "\n",
    "while pipeline_execution_status == \"Executing\":\n",
    "    try:\n",
    "        executions_response = sm.list_pipeline_executions(PipelineName=pipeline_name)[\"PipelineExecutionSummaries\"]\n",
    "        pipeline_execution_status = executions_response[0][\"PipelineExecutionStatus\"]\n",
    "    #        print('Executions for our pipeline...')\n",
    "    #        print(pipeline_execution_status)\n",
    "    except Exception as e:\n",
    "        print(\"Please wait...\")\n",
    "        time.sleep(30)\n",
    "\n",
    "pprint(executions_response)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# List Pipeline Execution Steps\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pipeline_execution_status = executions_response[0][\"PipelineExecutionStatus\"]\n",
    "print(pipeline_execution_status)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pipeline_execution_arn = executions_response[0][\"PipelineExecutionArn\"]\n",
    "print(pipeline_execution_arn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pprint import pprint\n",
    "\n",
    "steps = sm.list_pipeline_execution_steps(PipelineExecutionArn=pipeline_execution_arn)\n",
    "\n",
    "pprint(steps)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# View Registered Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for execution_step in steps[\"PipelineExecutionSteps\"]:\n",
    "    if execution_step[\"StepName\"] == \"RegisterModel\":\n",
    "        model_package_arn = execution_step[\"Metadata\"][\"RegisterModel\"][\"Arn\"]\n",
    "        break\n",
    "print(model_package_arn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_package_update_response = sm.update_model_package(\n",
    "    ModelPackageArn=model_package_arn,\n",
    "    ModelApprovalStatus=\"Approved\",  # Other options are Rejected and PendingManualApproval\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# View Created Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for execution_step in steps[\"PipelineExecutionSteps\"]:\n",
    "    if execution_step[\"StepName\"] == \"CreateModel\":\n",
    "        model_arn = execution_step[\"Metadata\"][\"Model\"][\"Arn\"]\n",
    "        break\n",
    "print(model_arn)\n",
    "\n",
    "model_name = model_arn.split(\"/\")[-1]\n",
    "print(model_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create Model Endpoint from Model Registry\n",
    "More details here:  https://docs.aws.amazon.com/sagemaker/latest/dg/model-registry-deploy.html\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "\n",
    "timestamp = int(time.time())\n",
    "\n",
    "model_from_registry_name = \"bert-model-from-registry-{}\".format(timestamp)\n",
    "print(\"Model from registry name : {}\".format(model_from_registry_name))\n",
    "\n",
    "model_registry_package_container = {\n",
    "    \"ModelPackageName\": model_package_arn,\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pprint import pprint\n",
    "\n",
    "create_model_from_registry_respose = sm.create_model(\n",
    "    ModelName=model_from_registry_name, ExecutionRoleArn=role, PrimaryContainer=model_registry_package_container\n",
    ")\n",
    "pprint(create_model_from_registry_respose)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_from_registry_arn = create_model_from_registry_respose[\"ModelArn\"]\n",
    "model_from_registry_arn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "endpoint_config_name = \"bert-model-from-registry-epc-{}\".format(timestamp)\n",
    "print(endpoint_config_name)\n",
    "\n",
    "create_endpoint_config_response = sm.create_endpoint_config(\n",
    "    EndpointConfigName=endpoint_config_name,\n",
    "    ProductionVariants=[\n",
    "        {\n",
    "            \"InstanceType\": \"ml.m4.xlarge\",\n",
    "            \"InitialVariantWeight\": 1,\n",
    "            \"InitialInstanceCount\": 1,\n",
    "            \"ModelName\": model_name,\n",
    "            \"VariantName\": \"AllTraffic\",\n",
    "        }\n",
    "    ],\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pipeline_endpoint_name = \"bert-model-from-registry-ep-{}\".format(timestamp)\n",
    "print(\"EndpointName={}\".format(pipeline_endpoint_name))\n",
    "\n",
    "create_endpoint_response = sm.create_endpoint(\n",
    "    EndpointName=pipeline_endpoint_name, EndpointConfigName=endpoint_config_name\n",
    ")\n",
    "print(create_endpoint_response[\"EndpointArn\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.core.display import display, HTML\n",
    "\n",
    "display(\n",
    "    HTML(\n",
    "        '<b>Review <a target=\"blank\" href=\"https://console.aws.amazon.com/sagemaker/home?region={}#/endpoints/{}\">SageMaker REST Endpoint</a></b>'.format(\n",
    "            region, pipeline_endpoint_name\n",
    "        )\n",
    "    )\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# _Wait Until the Endpoint is Deployed_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "\n",
    "waiter = sm.get_waiter(\"endpoint_in_service\")\n",
    "waiter.wait(EndpointName=pipeline_endpoint_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# _Wait Until the Endpoint ^^ Above ^^ is Deployed_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# List All Artifacts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "from sagemaker.lineage.visualizer import LineageTableVisualizer\n",
    "\n",
    "viz = LineageTableVisualizer(sagemaker.session.Session())\n",
    "\n",
    "for execution_step in reversed(steps[\"PipelineExecutionSteps\"]):\n",
    "    print(execution_step)\n",
    "    # We are doing this because there appears to be a bug of this LineageTableVisualizer handling the Processing Step\n",
    "    if execution_step[\"StepName\"] == \"Processing\":\n",
    "        processing_job_name = execution_step[\"Metadata\"][\"ProcessingJob\"][\"Arn\"].split(\"/\")[-1]\n",
    "        print(processing_job_name)\n",
    "        display(viz.show(processing_job_name=processing_job_name))\n",
    "    elif execution_step[\"StepName\"] == \"Train\":\n",
    "        training_job_name = execution_step[\"Metadata\"][\"TrainingJob\"][\"Arn\"].split(\"/\")[-1]\n",
    "        print(training_job_name)\n",
    "        display(viz.show(training_job_name=training_job_name))\n",
    "    else:\n",
    "        display(viz.show(pipeline_execution_step=execution_step))\n",
    "        time.sleep(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Test the Deployed Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "from sagemaker.tensorflow.model import TensorFlowPredictor\n",
    "from sagemaker.serializers import JSONLinesSerializer\n",
    "from sagemaker.deserializers import JSONLinesDeserializer\n",
    "\n",
    "predictor = TensorFlowPredictor(\n",
    "    endpoint_name=pipeline_endpoint_name,\n",
    "    sagemaker_session=sess,\n",
    "    model_name=\"saved_model\",\n",
    "    model_version=0,\n",
    "    content_type=\"application/jsonlines\",\n",
    "    accept_type=\"application/jsonlines\",\n",
    "    serializer=JSONLinesSerializer(),\n",
    "    deserializer=JSONLinesDeserializer(),\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Predict the `star_rating` with Ad Hoc `review_body` Samples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "inputs = [{\"features\": [\"This is great!\"]}, {\"features\": [\"This is bad.\"]}]\n",
    "\n",
    "predicted_classes = predictor.predict(inputs)\n",
    "\n",
    "for predicted_class in predicted_classes:\n",
    "    print(\"Predicted star_rating: {}\".format(predicted_class))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import csv\n",
    "\n",
    "df_reviews = pd.read_csv(\n",
    "    \"./data/amazon_reviews_us_Digital_Software_v1_00.tsv.gz\",\n",
    "    delimiter=\"\\t\",\n",
    "    quoting=csv.QUOTE_NONE,\n",
    "    compression=\"gzip\",\n",
    ")\n",
    "\n",
    "df_sample_reviews = df_reviews[[\"review_body\", \"star_rating\"]].sample(n=50)\n",
    "df_sample_reviews = df_sample_reviews.reset_index(drop=True)\n",
    "df_sample_reviews.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_sample_reviews.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "\n",
    "def predict(review_body):\n",
    "    inputs = [{\"features\": [review_body]}]\n",
    "    predicted_classes = predictor.predict(inputs)\n",
    "    return predicted_classes[0][\"predicted_label\"]\n",
    "\n",
    "\n",
    "df_sample_reviews[\"predicted_class\"] = df_sample_reviews[\"review_body\"].map(predict)\n",
    "df_sample_reviews.head(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Release Resources"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# sm.delete_endpoint(\n",
    "#      EndpointName=pipeline_endpoint_name\n",
    "# )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%html\n",
    "\n",
    "<p><b>Shutting down your kernel for this notebook to release resources.</b></p>\n",
    "<button class=\"sm-command-button\" data-commandlinker-command=\"kernelmenu:shutdown\" style=\"display:none;\">Shutdown Kernel</button>\n",
    "\n",
    "<script>\n",
    "try {\n",
    "    els = document.getElementsByClassName(\"sm-command-button\");\n",
    "    els[0].click();\n",
    "}\n",
    "catch(err) {\n",
    "    // NoOp\n",
    "}    \n",
    "</script>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%javascript\n",
    "\n",
    "try {\n",
    "    Jupyter.notebook.save_checkpoint();\n",
    "    Jupyter.notebook.session.delete();\n",
    "}\n",
    "catch(err) {\n",
    "    // NoOp\n",
    "}"
   ]
  }
 ],
 "metadata": {
  "instance_type": "ml.t3.medium",
  "kernelspec": {
   "display_name": "Python 3 (Data Science)",
   "language": "python",
   "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
